##############################
# STATE REACH AND DEVELOPMENT // PSRM // Replication
#
# Descriptive Statistics
# Sourced from "code/analysis_main.R"
#
##############################

# Locals
desc.data.names <- c(`DHS education data (Personal Recode)` = "educ",
                     `DHS infant mortality data (Children Recode)` = "infmort", 
                     `Nightlight data (Voronoi cells, 400 km$^2$)` = "nightlight")


# SUMMARY STATISTICS  TABLES A2--A4 ################
for(dn in desc.data.names){
  # Set table name
  stub <- paste0("statedev_sumtab_", dn)
  
  # data
  this.data <- data.ls[[dn]]
  
  # Vars
  these.vars <- c(get_depvar(dn), get_contr(dn), treat.vars)
  these.vars <- these.vars[these.vars %in% colnames(this.data)]
  this.data <- this.data[!is.na(this.data[,get_depvar(dn)]) & 
                           !is.na(this.data[,treat.vars[1]]) & 
                           !is.na(this.data[,treat.vars[2]]),]
  this.data <- this.data[,these.vars]
  
  # Make table
  fileConn <- file(file.path(tab.path, paste0("taba", which(desc.data.names == dn) + 1, "_", stub, ".tex")))
  writeLines(
    stargazer(this.data,
              title=paste0("Summary statistics: ",names(desc.data.names[desc.data.names == dn])),
              covariate.labels = names(these.vars),
              notes.align = "l",label= stub, align = T,
              digits = 2,  rownames = F, digit.separate = 0, column.sep.width ="-10pt",
              type  = "latex"), 
    fileConn)
  close(fileConn)
}

# TABLE A5 OF DATA SOURCES #############################

# Extract rounds and years
data.sources.ls <- lapply(c(desc.data.names), function(dn){
  df <- data.ls[[dn]]
  
  if(dn == "nightlight"){
    df <- unique(df[!is.na(df$regcap) & !is.na(df$natcap),c("cowcode", "year")])
    df <- aggregate.data.frame(list(samples= df$year), list(cowcode = df$cowcode),
                               FUN = function(x){paste0(min(x),"--", max(x))})
  } else {
    if(dn == "educ"){
      df <- unique(df[!is.na(df$regcap) & !is.na(df$natcap),c("cowcode", "dhs_round", "dhs_subround")])
      df$round <- paste0(df$dhs_round, ".", df$dhs_subround)
    } else {
      df <- unique(df[!is.na(df$regcap) & !is.na(df$natcap),c("cowcode", "dhs_round", "dhs_subround")])
      df$round <- paste0(df$dhs_round, ".", df$dhs_subround)
    }
    df <- aggregate.data.frame(list(samples= df$round), list(cowcode = df$cowcode),
                               FUN = function(x){
                                 x <- as.character(x)
                                 if(length(x) > 4){
                                   x <- x[order(x)]
                                   x[1:(length(x)-1)] <- paste0(x[1:(length(x)-1)], ",")
                                   x[seq(4, length(x), by = 4)] <- paste0(x[seq(4, length(x), by = 4)], "\\lb")
                                   paste0("\\makecell[l]{",paste(x, collapse = " "), "}")
                                 } else {
                                   paste(x[order(x)], collapse = ", ")
                                 }
                                 })
  }
  colnames(df)[colnames(df) == "samples"] <- dn
  df
})

# Join all
data.sources.df <- data.sources.ls[[1]]
for(i in 2:length(data.sources.ls)){
  data.sources.df <- join(data.sources.df, data.sources.ls[[i]], type = "full")
}

# Add countrynames
col.names <- c("Adults", "Children", "Nightlight-cells")
colnames(data.sources.df)[colnames(data.sources.df) %in% desc.data.names] <- col.names
data.sources.df$Country <- countrycode(data.sources.df$cowcode, origin = "cown", destination = "country.name")
data.sources.df$Country[data.sources.df$Country == "Democratic Republic of the Congo"] <- "DR Congo"
data.sources.df$Country[data.sources.df$Country == "Gambia (Islamic Republic of the)"] <- "Gambia"
data.sources.df$Country[data.sources.df$Country == "United Republic of Tanzania"] <- "Tanzania"

# Sort Table by Country
data.sources.df <- data.sources.df[,c("Country", col.names)]
data.sources.df <- data.sources.df[order(data.sources.df$Country),]

# Notes
table.notes <- paste0("\\parbox[t]{1\\textwidth}{Note that the divergence between the samples used from the DHS stems from the fact that
                      not all surveys enlist the level of education of household members or come with
                      the Child Recode file needed to derive infant mortality rates.}")

# Make Table
sg.table <- stargazer(data.sources.df,
                      title=paste0("Samples across data sources, DHS rounds and nightlight observations"),
                      summary = F, 
                      notes.align = "l",label= "statedev_samples", align = F,
                      digits = 2,  rownames = F, digit.separate = 0, column.sep.width ="-3pt",
                      type  = "latex", notes = table.notes, notes.label = "")
sg.table <- gsub("\\textasteriskcentered ", "$^*$", sg.table, fixed = T)
sg.table <- gsub("\\textbackslash ", "\\", sg.table, fixed = T)
sg.table <- gsub("ccccc", "lllll", sg.table, fixed = T)
sg.table <- gsub("\\{", "{", sg.table, fixed = T)
sg.table <- gsub("\\}", "}", sg.table, fixed = T)


# Print table
fileConn <- file(file.path(tab.path, paste0("taba5_statedev_samples", ".tex")))
writeLines(sg.table, fileConn)
close(fileConn)



# FIGURE 4: VORONOI POLYGONS #############

# Load data

## Admin units
admin.shp <- readOGR(dsn = file.path("data", "africa_regions_panel.GeoJSON"), 
                     layer="africa_regions_panel")

## Uganda voronoi polygons
voronoi.shp <- readOGR(dsn = file.path("data", "voronoi_uga.GeoJSON"), 
                       layer="voronoi_uga")

## Uganda Lakes
this.lakes <- readOGR(dsn = file.path("data",  "lakes_uganda.GeoJSON"), 
                       layer="lakes_uganda")

# Uganda Cowcode
plot.cow <- 500

# Subset admin units to Uganda
this.adm.shp <- admin.shp[admin.shp$cowcode == plot.cow,]

# Get color for nightlight intensity, avg over all years
nl.values <- data.ls[["nightlight"]][data.ls[["nightlight"]]$cowcode == plot.cow & 
                               !is.na(data.ls[["nightlight"]]$cowcode),
                                               c("id", "nightlight.pc.log", "natcap")]
nl.values <- aggregate.data.frame(nl.values[, c("nightlight.pc.log", "natcap"), drop = F],
                                  nl.values[,c("id"), drop = F], FUN = mean)

# Join with Voronoi Cells
voronoi.shp@data <- join(voronoi.shp@data, 
                         nl.values, 
                         type = "left", by = "id")

# Color scale
voronoi.shp@data$nl.fac <- as.numeric(cut(voronoi.shp@data$nightlight.pc.log, 256))
colpal <-  viridisLite::inferno(n = 256)
voronoi.shp$color <- colpal[voronoi.shp@data$nl.fac]

# Missing polygons (0 population)
missing.id <- which(is.na(voronoi.shp@data$nl.fac))
missing.vor <- voronoi.shp[missing.id, ]

# Plot Main Setup
png(file.path(fig.path, paste0("fig4_voronoi_setup.png")), width = 4, height = 4, units = "in", res = 300)
par(mar = c(0,0,0,0))
plot(this.adm.shp, col = "transparent", border = "white", lwd = .5)
plot(voronoi.shp, add = T,
     col = "transparent", border = "black", lwd = .8)
plot(missing.vor, 
     col = "transparent", border = "black", add = T, lwd = .8)
plot(this.adm.shp, col = "transparent", border = "red", add = T, lwd = .8)
plot(this.lakes, col = "darkturquoise", border = "transparent", add = T)
dev.off()


# Outcomes
png(file.path(fig.path, paste0("fig4_voronoi_outcomes.png")), width = 5, height = 4, units = "in", res = 300)
par(mar = c(0,0,0,0))
layout(matrix(1:2,ncol=2), width = c(4,1),height = c(1,1))
plot(this.adm.shp, col = "transparent", border = "white", lwd = .5)
plot(voronoi.shp, 
     col = voronoi.shp$color, border = "white", lwd = .2, add = T)
plot(missing.vor, 
     col = "grey", border = "white", add = T, lwd = .2)
plot(this.lakes, col = "darkturquoise", border = "transparent", add = T)
plot(this.adm.shp, col = "transparent", border = "white", add = T, lwd = .5)

# Legend
legend_image <- as.raster(matrix(rev(colpal), ncol=1))
plot(c(0,2),c(min(voronoi.shp$nightlight.pc.log, na.rm = T), 
              max(voronoi.shp$nightlight.pc.log, na.rm = T)),
     type = 'n', axes = F,xlab = '', ylab = '', main = '')
axis(side = 4,pos = 0.5, at = seq(-6.5, -4.5, by = .5))
rasterImage(legend_image, 0,min(voronoi.shp$nightlight.pc.log, na.rm = T), 
            0.5, max(voronoi.shp$nightlight.pc.log, na.rm = T))
dev.off()



# FIGURE A13 PLOT AVERAGE EDUCATION RATES ################



# Prepare Data

# ... country-borders (2016)
cshp <- as_Spatial(cshp(as.Date("2016-01-01")))
cshp <- cshp[!cshp$gwcode %in%  c(580, 581, 402, 403, 590, 591),]
cshp <- cshp[cshp$gwcode %in%  admin.shp$cowcode,]


# ... all of Africa
all.africa <- gUnaryUnion(cshp, id = rep(1, length(cshp)))

# ... education rates

# ... --- collapse to points
plot.colcodes <- unique(data.ls[["educ"]]$cowcode)
educ.pts <- data.ls[["educ"]]
educ.pts <- join(aggregate.data.frame(list(educ.prim = educ.pts$educ.prim),
                                      educ.pts[,c("longnum", "latnum")], FUN = sum, na.rm = T),
                 aggregate.data.frame(list(obs = educ.pts$educ.prim),
                                      educ.pts[,c("longnum", "latnum")], FUN = function(x){length(x[!is.na(x)])}),
                 type = "left", by = c("longnum", "latnum"))
coordinates(educ.pts) <- ~ longnum + latnum

# ... --- to raster
raster <- raster(ext = extent(cshp), res = .25)
raster <- stack(lapply(c("educ.prim", "obs"), function(v){
  raster::rasterize(educ.pts, raster, field = v, fun = sum)
}))
names(raster) <- c("educ.prim", "obs")
raster[["educ.prim"]][raster[["obs"]] == 0] <- NA

# Color palette
colpal <- inferno(n = 256, alpha = 1, begin = 0, end = 1, direction = 1)


# Plot
png(file.path(fig.path, "figa13_africa_educ_map.png" ), width = 5, height = 4.5, units = "in", res = 400)
par(mar = c(0,0,0,0))

## Main
raster::plot(raster[["educ.prim"]]/raster[["obs"]], useRaster = T, interpolate = F, axes=FALSE, bty="n", box=FALSE,
             col = colpal, legend = F, colNA = "grey", ylim = c(-34.7, 36.34041), xlim = c(-17.2,51.3))
plot(gDifference(as(extent(raster), "SpatialPolygons"), all.africa), border = "white", col = "white", add = T)
plot(cshp[!cshp$gwcode %in% plot.colcodes,], col = "white", border = "grey", add = T)
plot(cshp[cshp$gwcode %in% plot.colcodes,], col = "transparent", border = "black", add = T)
plot(all.africa, col = "transparent", border = "black", add = T)

## Legend
plot(raster[["educ.prim"]]/raster[["obs"]], legend.only=TRUE,  col = colpal,
     legend.width=1, legend.shrink=0.75,
     legend.args=list(text='Primary education rate (percent)', side=2, font=1, line=0, cex=.8),
     smallplot=c(0.08,.10, 0.1,.45))
dev.off()







# FIGURE A13 PLOT AVERGAGE INFANT MORTALITY ##############

# Prepare Data

# ... infant mortality

# ... --- collapse to points
plot.colcodes <- unique(data.ls[["infmort"]]$cowcode)
dead.pts <- data.ls[["infmort"]]
dead.pts <- join(aggregate.data.frame(list(dead = dead.pts$dead),
                                      dead.pts[,c("longnum", "latnum")], FUN = sum, na.rm = T),
                 aggregate.data.frame(list(obs = dead.pts$dead),
                                      dead.pts[,c("longnum", "latnum")], FUN = function(x){length(x[!is.na(x)])}),
                 type = "left", by = c("longnum", "latnum"))
coordinates(dead.pts) <- ~ longnum + latnum

# ... --- to raster
raster <- raster(ext = extent(cshp), res = .25)
raster <- stack(lapply(c("dead", "obs"), function(v){
  raster::rasterize(dead.pts, raster, field = v, fun = sum)
}))
names(raster) <- c("infmort", "obs")
raster[["infmort"]][raster[["obs"]] == 0] <- NA

# Color palette
colpal <- inferno(n = 256, alpha = 1, begin = 0, end = 1, direction = 1)


# Plot
png(file.path(fig.path, "figa13_africa_infmort_map.png" ), width = 5, height = 4.5, units = "in", res = 400)
par(mar = c(0,0,0,0))

## Main
raster::plot(raster[["infmort"]]/raster[["obs"]], useRaster = T, interpolate = F, axes=FALSE, bty="n", box=FALSE,
             col = rev(colpal), legend = F, colNA = "grey", ylim = c(-34.7, 36.34041), xlim = c(-17.2,51.3))
plot(gDifference(as(extent(raster), "SpatialPolygons"), all.africa), border = "white", col = "white", add = T)
plot(cshp[!cshp$gwcode %in% plot.colcodes,], col = "white", border = "grey", add = T)
plot(cshp[cshp$gwcode %in% plot.colcodes,], col = "transparent", border = "black", add = T)
plot(all.africa, col = "transparent", border = "black", add = T)

## Legend
plot(raster[["infmort"]]/raster[["obs"]], legend.only=TRUE,  col = rev(colpal),
     legend.width=1, legend.shrink=0.75,
     legend.args=list(text='Infant mortality (percent)', side=2, font=1, line=0, cex=.8),
     smallplot=c(0.08,.10, 0.1,.45))
dev.off()





